import pandas as pd
import boto3
import sagemaker
import tensorflow as tf
import numpy as np
import os
# session and role
sagemaker_session = sagemaker.Session()
role = sagemaker.get_execution_role()
We will visualize results from the latest model
#model_ann = tf.keras.models.load_model(model_location)
model_ann = tf.keras.models.load_model('s3://sagemaker-us-east-1-286722933665/tensorflow-training-2021-06-27-13-14-52-016/model/model_ann', compile=False)
model_ann.compile(optimizer='adam', loss='mean_squared_error')
# Check its architecture
model_ann.summary()
test_data = pd.read_csv('data/train_test/test.csv', header=None, names=None, low_memory=False, skiprows=1, skipinitialspace=True)
# Measured values are in the last column
test_x = test_data.iloc[:,:15].values
test_y = test_data.iloc[:,-1].values
# Make predictions
y_pred = model_ann.predict(test_x)
y_pred
collision distance > 0: No collision collision distance <= 0: Collision
col_pred = []
for pred in y_pred:
if pred > 0:
col_pred.append(0)
else:
col_pred.append(1)
len(col_pred)
col_true = []
for pred in test_y:
if pred > 0:
col_true.append(0)
else:
col_true.append(1)
len(col_true)
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(col_pred, col_true)
print(cm)
print(accuracy_score(col_pred, col_true))
error_pred = []
for i in range(0, len(y_pred)):
error = test_y[i] - y_pred[i,0]
error_pred.append(error)
test_features= ["body_mass", "cogx", "obj_dist", "react_time", "road_mu", "speed", "tire_rr", "pedal.ratio", "boo.ampli", "mc.area", "pf.area", "pf.rbrake", "pr.area", "pr.rbrake", "update_rate"]
df_test = pd.DataFrame(test_x, columns=test_features)
df_test['collision'] = col_true
df_test['dist_to_col'] = test_y
df_test['error_pred'] = error_pred
df_test.shape
import plotly.express as px
fig = px.histogram(df_test, x='error_pred', y=None, color='road_mu')
fig.show()
Conclusion from the coloured histogram above:
For this dataset, we know that there are equal instances of the 3 'road_mu' (road friction) values. It is clear that most number of error counts happen for road_mu: 1, 0.7 and 0.4 in that order. This implies, the model prediction whether a collision occurs or not is more accurate for lower road_mu values.
An interesting point to note, the reverse is true for the 'maginitude' of error for the road_mu values. The 'magnitude' of error is the highest for the lowest road_mu: 0.4 and lowest for the highest road_mu: 1. This implies, the model prediction of distance to collision is more accurate for higher road_mu values
Note, histogram skew expected since we do not have equal number of collisions (~67%) and no-collisions (~33%) in the dataset.
fig = px.scatter(df_test, x='error_pred', y='speed', color="collision",
title="Prediction error distribution with speed")
fig.show()
Conclusion from the coloured histogram above:
validation_data = pd.read_csv('data/train_test/validation_test.csv', header=None, names=None, low_memory=False, skiprows=1, skipinitialspace=True)
val_x = validation_data.iloc[:,:15].values
val_y = validation_data.iloc[:,15].values
val_pred = model_ann.predict(val_x)
col_pred = []
for pred in val_pred:
if pred > 0:
col_pred.append(0)
else:
col_pred.append(1)
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(col_pred, val_y)
print(cm)
print(accuracy_score(col_pred, val_y))
val_y_dist = validation_data.iloc[:, 16].values
val_pred = model_ann.predict(val_x)
pred_val = []
for pred in val_pred:
pred = pred.tolist()
pred_val.append(pred[0])
pred_val = np.array(pred_val)
import plotly.graph_objects as go
X=np.array(range(0, len(val_pred)))
fig1 = px.scatter(x=X, y=pred_val, color_discrete_sequence=["blue"])
fig2 = px.scatter(validation_data, y=val_y_dist, color_discrete_sequence=["orange"], hover_name=15, text=5)
fig = go.Figure(data=fig1.data + fig2.data)
fig.update_layout(title="ANN prediction (blue) Vs Simulation result (numbered orange)", xaxis_title="index", yaxis_title="Distance to collision",)
fig.show()
In the plot above, the numbers on the dots are the vehicle speed values when the road object ws detected just before the emergency braking. Here also, it can be clearly seen that the prediction error margin is higher at higher speeds.
Hover over the data points for more info. Column '5' represents the speed of the vehicle. The yellow dots also whether collision had occured ('1') or not ('0')
Calculate prediction error
error_pred = []
for i in range(0, len(val_y_dist)):
error = val_y_dist[i] - val_pred[i,0]
error_pred.append(error)
test_features= ["body_mass", "cogx", "obj_dist", "react_time", "road_mu", "speed", "tire_rr", "pedal.ratio", "boo.ampli", "mc.area", "pf.area", "pf.rbrake", "pr.area", "pr.rbrake", "update_rate"]
df_val = pd.DataFrame(val_x, columns=test_features)
df_val['collision'] = val_y
df_val['dist_to_col'] = val_y_dist
df_val['error_pred'] = error_pred
fig = px.histogram(df_val, x='error_pred', y=None, color='pedal.ratio')
fig.show()
fig = px.scatter(df_val, x='error_pred', y='speed', color="collision",
title="Prediction error distribution with speed")
fig.show()
The plots for validation show similar outcome as that seen for 'test_data'. These graphs can be plotted for all the features for investigation.